Loading the class labels sent by Forrest
cl <- read.csv("cleft_class.csv")[-18,]
f0 <- read.csv("collman15v2_tightAnnotationsF0.csv")
sf0 <- scale(f0, center = TRUE, scale = TRUE)
loc <- read.csv("locationstest20171211.csv")
table(cl$gaba)
##
## 0 1
## 214 22
table(cl$postgaba)
##
## 0 1
## 212 24
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'black', 'green', 'green', 'green')
set.seed(1030)
L <- runAll(sf0, ccol = ccol)
L[[1]] <- mlocation(f0, ccol = ccol)
plot(L[[1]])
plot(L[[2]])
plot(L[[3]])
plot(L[[4]])
pairhex(sf0)
plot(L[[6]])
plotDend(L[[7]])
## 1 21 22
## 0.7712 0.1314 0.0975
c1 <- L[[7]]$dat$labels$col
pairs(sf0, col = viridis(max(c1))[c1],
pch = 19, cex = 0.2)
stackM(L[[7]], ccol = ccol, centered = TRUE, depth = 2)
clusterMeans(L[[7]], ccol = ccol)
set.seed(1030)
h2 <- hmc(sf0, maxDepth = 2)
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)
perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
set.seed(i*2)
mclust::adjustedRandIndex(sample(l2), cl$gaba)
}
tmp <- h2$dat$labels$col
pairs(h2$dat$data,
col = viridis(max(tmp))[tmp],
pch = 19, cex = 0.25)
plotDend(h2)
## 1 2
## 0.8093 0.1907
stackM(h2, depth = 2, centered = TRUE, ccol = ccol)
hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
main = "permutation test of ARI values")
abline(v = p0, col = 'red')
df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$text <- links
p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape = classification)) +
facet_wrap(~ z, ncol = 6) +
geom_point()
p1
ggplotly(p1)
#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
# hoverinfo = 'text',
# text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")
table(classification = tmp-1, df1$gaba)
##
## classification 0 1
## 0 189 2
## 1 25 20
set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sf0)
rf1 <- randomForest(gaba ~ ., data = rfdat)
#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)
##
## 0 1
## 123 13
rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)
##
## Call:
## randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 3%
## Confusion matrix:
## 0 1 class.error
## 0 91 0 0.0000000
## 1 3 6 0.3333333
rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])
table(rf2.pred, rfdat[test,]$gaba)
##
## rf2.pred 0 1
## 0 122 2
## 1 1 11
varImpPlot(rf2)